
********************************************************************************
cd $pathdata_processed
use terminales_fin, clear

sum chance
replace chance = chance/`r(sd)'
replace expected_gpa = expected_gpa/`r(sd)'
replace predicted_gpa = predicted_gpa/`r(sd)'

cd $pathgraph
egen chanceq = xtile(chance), nq(2)
********************************************************************************
global b 0.5

//small groups of high school gpa
gen stdvide3q = .
sum stdvide3  

forvalues i = -6($b)3 {
	qui replace stdvide3q = `i'+$b if stdvide3 > (`i') & stdvide3 <= (`i'+$b)
	}	

gen stdvide3q2 = string(stdvide3q, "%6.2f")
destring stdvide3q2, replace
drop stdvide3q
rename stdvide3q2 stdvide3q

bysort chanceq stdvide3q: gen N = _N
bysort chanceq: gen tot = _N

keep chanceq stdvide3q N tot
duplicates drop

bysort chance (stdvide3q): gen cdf = sum(N)
replace cdf = cdf/tot

sort chanceq stdvide3q
* Generate a cumulative distribution plot
grstyle set color cblind, select(5 9 7)
twoway (line cdf stdvide3q if chanceq == 1, sort) ///
	   (line cdf stdvide3q if chanceq == 2, sort), ///
       legend(order(1 "Negative recentered GPA_luck" 2 "Positive recentered GPA_luck") size(3)) ///
       xtitle("High school GPA") ytitle("Cumulative Distributions") ///
       title("")	   

graph export cdf_high_school_gpa.pdf, as(pdf) replace

drop tot
reshape wide cdf N, i(stdvide3q) j(chanceq)

gen diff = cdf1 - cdf2

gen se_diff = sqrt((cdf1*(1-cdf1)/N1) + (cdf2*(1-cdf2)/N2))

gen ci_upper = diff + 1.96*se_diff
gen ci_lower = diff - 1.96*se_diff

sort stdvide3q

cd $pathdata_processed
append using terminales_fin

cd $pathgraph

grstyle set color cblind, select(5 9 7)
graph twoway (line diff stdvide3q, lcolor("`r(p1)'") yaxis(1)) ///
             (line ci_upper stdvide3q, lcolor("`r(p1)'") lpattern(dash) yaxis(1)) ///
             (line ci_lower stdvide3q, lcolor("`r(p1)'") lpattern(dash) yaxis(1)) ///
             (histogram stdvide3, percent color("`r(p2)'%30") w(0.1) yaxis(2)), ///
             title("") ///
             ytitle("Difference in CDFs of high school GPA" "between negative and positive recentered GPA_luck", size(medium)) ///
             ytitle("Percentages", size(medium) axis(2)) ///
             ylab(-0.04(0.02)0.04, axis(1)) ylab(-4(1)5, axis(2) nogrid) ///
             yscale(range(-0.04 0.04) axis(1)) yscale(range(-4.26 5) axis(2)) ///
             xtitle("High school GPA", size(medium)) ///
             legend(off) scale(0.7) ///
             xtick(#10) xlab(#15, labsize(medium) angle(45)) xscale(range(-6 4))

graph export distribution2_high_school_gpa.pdf, as(pdf) replace
